/* Stata_BootstrapConleyTaber.do */

* Global options.
global OUTREG2OPTIONS tex(fragment) 2aster auto(2) rdec(3) label
global TABLELOC "."
global FE_options fe cluster(Party_State)
global numboots = 1000
set seed 4321

use "Contributions.dta",clear

*
* tsset Diff in diff by party-state-year.
*
tsset Party_State cycle, delta(2)

* Note cases always control vs evertreated.
qui egen evertreated = max(less_costly_primary), by(Party_State)
qui egen evertreated2 = max(nonpartisan_primary), by(Party_State)
qui replace evertreated = 1 if evertreated2 == 1
qui drop evertreated2

*****************************
* Program to sample a residual, add to EY and create yhat,
* a la Conley Taber 2011.
*****************************
capture program drop createYhat
program define createYhat
  * Arguments.
  args evertreated Y EY r
  qui summ `evertreated'
  if (r(mean) > 0.5) {
    di "Must have less than 50% of sample ever treated for this code." _n
    break
  }
  * Shuffle residuals.
  qui gen double shuffle = runiform()
  * Figure out which residual evertreated cases will match.
  gsort -`evertreated' shuffle 
  qui gen long whch = _n 
  * Now randomly sort with always-control at top.
  qui replace shuffle = runiform()
  sort `evertreated' shuffle
  * Grab residual from always-control for evertreated cases.
  qui gen r_new = `r'[whch] if evertreated == 1
  * Calculate yhat. Y for always-control cases, EY + r for evertreated cases.
  capture drop yhat
  qui g yhat = `Y' if `evertreated' == 0
  qui replace yhat = `EY' + r_new if `evertreated' == 1
  qui drop shuffle r_new whch
end

*****************************
* Bootstrap Conley Taber.
*****************************
qui gen insample = 1
* Loop over different specifications.
forvalues modelgroups = 1(1)1 {
  * Determine specification.
  if `modelgroups'==1 {
    * My coding of less_costly and nonpartisan.
    global EVs = "less_costly_primary nonpartisan_primary"
  }
  
  ******************
  * Loop over each DV, each time creating
  * a Conley Taber bootstrap distribution.
  ******************
  foreach var of varlist sumcontribs log_sumcontribs ncontribs log_ncontribs ndonors log_ndonors pct_primary {
    di _n _n "BOOTSTRAPPING NULL DISTRIBUTION FOR `var'"
    * Regress outcome on state and year FEs.
    qui xtreg `var' i.cycle if insample == 1 & evertreated == 0, $FE_options
    * Collect residuals to use as null distribution of errors.
    capture drop r
    qui predict r if e(sample), e
    * Use the coefficients from regression to predict expected values for all obs.
    capture drop EY
    qui predict EY

    * Create new frame to store bootstrap coefficients.
    capture frame drop boots
    frame create boots
    frame boots: qui set obs $numboots
    foreach ev of varlist $EVs {
      frame boots: qui gen boot_`ev' = .
    }
    
    **********************
    * Bootstrap.
    **********************
    forvalues i = 1/$numboots {
      if (mod(`i',100) == 0) { 
        di "Bootstrap `i'" 
      }
      * Sample a residual and add to EY to create Y-hat for ever treated obs.
      * (See function above)
      createYhat evertreated `var' EY r
      
      * Run original regression on yhat to estimate null distribution
      * of Diff in Diff coefficients.
      qui xtreg yhat $EVs i.cycle if insample == 1, $FE_options
      
      * Store bootstrap coefficients.
      foreach ev of varlist $EVs {
        frame boots: qui replace boot_`ev' = _b[`ev'] in `i'
      }      
    }

    ******************
    * Add actual coefficients as own variables.
    * To compare to null distribution.
    ******************
    qui xtreg `var' $EVs i.cycle if insample == 1, $FE_options
    foreach ev of varlist $EVs {
      frame boots: qui gen actual_`ev' = _b[`ev'] in 1
    }

    * Save out bootstrapped coefs and drop.
    *frame boots: cl
    frame boots: qui outsheet using "ConleyTaber-`var'.csv", comma replace
  }
  
}
